/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 *    KDTree.java
 *    Copyright (C) 2000-2012 University of Waikato
 *    
 */

package weka.core.neighboursearch;

import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;

import weka.core.DistanceFunction;
import weka.core.EuclideanDistance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.neighboursearch.kdtrees.KDTreeNode;
import weka.core.neighboursearch.kdtrees.KDTreeNodeSplitter;
import weka.core.neighboursearch.kdtrees.SlidingMidPointOfWidestSide;

/**
 * <!-- globalinfo-start --> Class implementing the KDTree search algorithm for
 * nearest neighbour search.<br/>
 * The connection to dataset is only a reference. For the tree structure the
 * indexes are stored in an array. <br/>
 * Building the tree:<br/>
 * If a node has &lt;maximal-inst-number&gt; (option -L) instances no further
 * splitting is done. Also if the split would leave one side empty, the branch
 * is not split any further even if the instances in the resulting node are more
 * than &lt;maximal-inst-number&gt; instances.<br/>
 * **PLEASE NOTE:** The algorithm can not handle missing values, so it is
 * advisable to run ReplaceMissingValues filter if there are any missing values
 * in the dataset.<br/>
 * <br/>
 * For more information see:<br/>
 * <br/>
 * Jerome H. Friedman, Jon Luis Bentley, Raphael Ari Finkel (1977). An Algorithm
 * for Finding Best Matches in Logarithmic Expected Time. ACM Transactions on
 * Mathematics Software. 3(3):209-226.<br/>
 * <br/>
 * Andrew Moore (1991). A tutorial on kd-trees.
 * <p/>
 * <!-- globalinfo-end -->
 * 
 * <!-- technical-bibtex-start --> BibTeX:
 * 
 * <pre>
 * &#64;article{Friedman1977,
 *    author = {Jerome H. Friedman and Jon Luis Bentley and Raphael Ari Finkel},
 *    journal = {ACM Transactions on Mathematics Software},
 *    month = {September},
 *    number = {3},
 *    pages = {209-226},
 *    title = {An Algorithm for Finding Best Matches in Logarithmic Expected Time},
 *    volume = {3},
 *    year = {1977}
 * }
 * 
 * &#64;techreport{Moore1991,
 *    author = {Andrew Moore},
 *    booktitle = {University of Cambridge Computer Laboratory Technical Report No. 209},
 *    howpublished = {Extract from PhD Thesis},
 *    title = {A tutorial on kd-trees},
 *    year = {1991},
 *    HTTP = {Available from http://www.autonlab.org/autonweb/14665.html}
 * }
 * </pre>
 * <p/>
 * <!-- technical-bibtex-end -->
 * 
 * <!-- options-start --> Valid options are:
 * <p/>
 * 
 * <pre>
 *  -S &lt;classname and options&gt;
 *  Node splitting method to use.
 *  (default: weka.core.neighboursearch.kdtrees.SlidingMidPointOfWidestSide)
 * </pre>
 * 
 * <pre>
 *  -W &lt;value&gt;
 *  Set minimal width of a box
 *  (default: 1.0E-2).
 * </pre>
 * 
 * <pre>
 *  -L
 *  Maximal number of instances in a leaf
 *  (default: 40).
 * </pre>
 * 
 * <pre>
 *  -N
 *  Normalizing will be done
 *  (Select dimension for split, with normalising to universe).
 * </pre>
 * 
 * <!-- options-end -->
 * 
 * @author Gabi Schmidberger (gabi[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
 * @author Malcolm Ware (mfw4[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
 * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
 * @version $Revision$
 */
public class KDTree extends NearestNeighbourSearch implements TechnicalInformationHandler {

    /** For serialization. */
    private static final long serialVersionUID = 1505717283763272533L;

    /**
     * Array holding the distances of the nearest neighbours. It is filled up both
     * by nearestNeighbour() and kNearestNeighbours().
     */
    protected double[] m_DistanceList;

    /**
     * Indexlist of the instances of this kdtree. Instances get sorted according to
     * the splits. the nodes of the KDTree just hold their start and end indices
     */
    protected int[] m_InstList;

    /** The root node of the tree. */
    protected KDTreeNode m_Root;

    /** The node splitter. */
    protected KDTreeNodeSplitter m_Splitter = new SlidingMidPointOfWidestSide();

    /** Tree stats. */
    protected int m_NumNodes, m_NumLeaves, m_MaxDepth;

    /** Tree Stats variables. */
    protected TreePerformanceStats m_TreeStats = null;

    // Constants
    /** The index of MIN value in attributes' range array. */
    public static final int MIN = EuclideanDistance.R_MIN;

    /** The index of MAX value in attributes' range array. */
    public static final int MAX = EuclideanDistance.R_MAX;

    /** The index of WIDTH (MAX-MIN) value in attributes' range array. */
    public static final int WIDTH = EuclideanDistance.R_WIDTH;

    /**
     * Returns an instance of a TechnicalInformation object, containing detailed
     * information about the technical background of this class, e.g., paper
     * reference or book this class is based on.
     * 
     * @return the technical information about this class
     */
    public TechnicalInformation getTechnicalInformation() {
        TechnicalInformation result;
        TechnicalInformation additional;

        result = new TechnicalInformation(Type.ARTICLE);
        result.setValue(Field.AUTHOR, "Jerome H. Friedman and Jon Luis Bentley and Raphael Ari Finkel");
        result.setValue(Field.YEAR, "1977");
        result.setValue(Field.TITLE, "An Algorithm for Finding Best Matches in Logarithmic Expected Time");
        result.setValue(Field.JOURNAL, "ACM Transactions on Mathematics Software");
        result.setValue(Field.PAGES, "209-226");
        result.setValue(Field.MONTH, "September");
        result.setValue(Field.VOLUME, "3");
        result.setValue(Field.NUMBER, "3");

        additional = result.add(Type.TECHREPORT);
        additional.setValue(Field.AUTHOR, "Andrew Moore");
        additional.setValue(Field.YEAR, "1991");
        additional.setValue(Field.TITLE, "A tutorial on kd-trees");
        additional.setValue(Field.HOWPUBLISHED, "Extract from PhD Thesis");
        additional.setValue(Field.BOOKTITLE, "University of Cambridge Computer Laboratory Technical Report No. 209");
        additional.setValue(Field.HTTP, "Available from http://www.autonlab.org/autonweb/14665.html");

        return result;
    }

    /**
     * Creates a new instance of KDTree.
     */
    public KDTree() {
        super();
        if (getMeasurePerformance())
            m_Stats = m_TreeStats = new TreePerformanceStats();
    }

    /**
     * Creates a new instance of KDTree. It also builds the tree on supplied set of
     * Instances.
     * 
     * @param insts The instances/points on which the BallTree should be built on.
     */
    public KDTree(Instances insts) {
        super(insts);
        if (getMeasurePerformance())
            m_Stats = m_TreeStats = new TreePerformanceStats();
    }

    /**
     * Builds the KDTree on the supplied set of instances/points. It is adviseable
     * to run the replace missing attributes filter on the passed instances first.
     * NOTE: This method should not be called from outside this class. Outside
     * classes should call setInstances(Instances) instead.
     * 
     * @param instances The instances to build the tree on
     * @throws Exception if something goes wrong
     */
    protected void buildKDTree(Instances instances) throws Exception {

        checkMissing(instances);
        if (m_EuclideanDistance == null)
            m_DistanceFunction = m_EuclideanDistance = new EuclideanDistance(instances);
        else
            m_EuclideanDistance.setInstances(instances);

        m_Instances = instances;
        int numInst = m_Instances.numInstances();

        // Make the global index list
        m_InstList = new int[numInst];

        for (int i = 0; i < numInst; i++) {
            m_InstList[i] = i;
        }

        double[][] universe = m_EuclideanDistance.getRanges();

        // initializing internal fields of KDTreeSplitter
        m_Splitter.setInstances(m_Instances);
        m_Splitter.setInstanceList(m_InstList);
        m_Splitter.setEuclideanDistanceFunction(m_EuclideanDistance);
        m_Splitter.setNodeWidthNormalization(m_NormalizeNodeWidth);

        // building tree
        m_NumNodes = m_NumLeaves = 1;
        m_MaxDepth = 0;
        m_Root = new KDTreeNode(m_NumNodes, 0, m_Instances.numInstances() - 1, universe);

        splitNodes(m_Root, universe, m_MaxDepth + 1);
    }

    /**
     * Recursively splits nodes of a tree starting from the supplied node. The
     * splitting stops for any node for which the number of instances/points falls
     * below a given threshold (given by m_MaxInstInLeaf), or if the maximum
     * relative width/range of the instances/points (i.e. max_i(max(att_i) -
     * min(att_i)) ) falls below a given threshold (given by m_MinBoxRelWidth).
     * 
     * @param node     The node to start splitting from.
     * @param universe The attribute ranges of the whole dataset.
     * @param depth    The depth of the supplied node.
     * @throws Exception If there is some problem splitting.
     */
    protected void splitNodes(KDTreeNode node, double[][] universe, int depth) throws Exception {
        double[][] nodeRanges = m_EuclideanDistance.initializeRanges(m_InstList, node.m_Start, node.m_End);
        if (node.numInstances() <= m_MaxInstInLeaf || getMaxRelativeNodeWidth(nodeRanges, universe) <= m_MinBoxRelWidth)
            return;

        // splitting a node so it is no longer a leaf
        m_NumLeaves--;

        if (depth > m_MaxDepth)
            m_MaxDepth = depth;

        m_Splitter.splitNode(node, m_NumNodes, nodeRanges, universe);
        m_NumNodes += 2;
        m_NumLeaves += 2;

        splitNodes(node.m_Left, universe, depth + 1);
        splitNodes(node.m_Right, universe, depth + 1);
    }

    /**
     * Returns (in the supplied heap object) the k nearest neighbours of the given
     * instance starting from the give tree node. &gt;k neighbours are returned if
     * there are more than one neighbours at the kth boundary. NOTE: This method
     * should not be used from outside this class. Outside classes should call
     * kNearestNeighbours(Instance, int).
     * 
     * @param target            The instance to find the nearest neighbours for.
     * @param node              The KDTreeNode to start the search from.
     * @param k                 The number of neighbours to find.
     * @param heap              The MyHeap object to store/update the kNNs found
     *                          during the search.
     * @param distanceToParents The distance of the supplied target to the parents
     *                          of the supplied tree node.
     * @throws Exception if the nearest neighbour could not be found.
     */
    protected void findNearestNeighbours(Instance target, KDTreeNode node, int k, MyHeap heap, double distanceToParents) throws Exception {
        if (node.isALeaf()) {
            if (m_TreeStats != null) {
                m_TreeStats.updatePointCount(node.numInstances());
                m_TreeStats.incrLeafCount();
            }
            double distance;
            // look at all the instances in this leaf
            for (int idx = node.m_Start; idx <= node.m_End; idx++) {
                if (target == m_Instances.instance(m_InstList[idx])) // for
                                                                     // hold-one-out
                                                                     // cross-validation
                    continue;
                if (heap.size() < k) {
                    distance = m_EuclideanDistance.distance(target, m_Instances.instance(m_InstList[idx]), Double.POSITIVE_INFINITY, m_Stats);
                    heap.put(m_InstList[idx], distance);
                } else {
                    MyHeapElement temp = heap.peek();
                    distance = m_EuclideanDistance.distance(target, m_Instances.instance(m_InstList[idx]), temp.distance, m_Stats);
                    if (distance < temp.distance) {
                        heap.putBySubstitute(m_InstList[idx], distance);
                    } else if (distance == temp.distance) {
                        heap.putKthNearest(m_InstList[idx], distance);
                    }
                } // end else heap.size==k
            } // end for

        } else {
            if (m_TreeStats != null) {
                m_TreeStats.incrIntNodeCount();
            }
            KDTreeNode nearer, further;
            boolean targetInLeft = m_EuclideanDistance.valueIsSmallerEqual(target, node.m_SplitDim, node.m_SplitValue);

            if (targetInLeft) {
                nearer = node.m_Left;
                further = node.m_Right;
            } else {
                nearer = node.m_Right;
                further = node.m_Left;
            }
            findNearestNeighbours(target, nearer, k, heap, distanceToParents);

            // ... now look in further half if maxDist reaches into it
            if (heap.size() < k) { // if haven't found the first k
                double distanceToSplitPlane = distanceToParents + m_EuclideanDistance.sqDifference(node.m_SplitDim, target.value(node.m_SplitDim), node.m_SplitValue);
                findNearestNeighbours(target, further, k, heap, distanceToSplitPlane);
                return;
            } else { // else see if ball centered at query intersects with the other
                     // side.
                double distanceToSplitPlane = distanceToParents + m_EuclideanDistance.sqDifference(node.m_SplitDim, target.value(node.m_SplitDim), node.m_SplitValue);
                if (heap.peek().distance >= distanceToSplitPlane) {
                    findNearestNeighbours(target, further, k, heap, distanceToSplitPlane);
                }
            } // end else
        } // end else_if an internal node
    }

    /**
     * Returns the k nearest neighbours of the supplied instance. &gt;k neighbours
     * are returned if there are more than one neighbours at the kth boundary.
     * 
     * @param target The instance to find the nearest neighbours for.
     * @param k      The number of neighbours to find.
     * @return The k nearest neighbours (or &gt;k if more there are than one
     *         neighbours at the kth boundary).
     * @throws Exception if the nearest neighbour could not be found.
     */
    public Instances kNearestNeighbours(Instance target, int k) throws Exception {
        checkMissing(target);

        if (m_Stats != null)
            m_Stats.searchStart();

        MyHeap heap = new MyHeap(k);
        findNearestNeighbours(target, m_Root, k, heap, 0.0);

        if (m_Stats != null)
            m_Stats.searchFinish();

        Instances neighbours = new Instances(m_Instances, (heap.size() + heap.noOfKthNearest()));
        m_DistanceList = new double[heap.size() + heap.noOfKthNearest()];
        int[] indices = new int[heap.size() + heap.noOfKthNearest()];
        int i = indices.length - 1;
        MyHeapElement h;
        while (heap.noOfKthNearest() > 0) {
            h = heap.getKthNearest();
            indices[i] = h.index;
            m_DistanceList[i] = h.distance;
            i--;
        }
        while (heap.size() > 0) {
            h = heap.get();
            indices[i] = h.index;
            m_DistanceList[i] = h.distance;
            i--;
        }
        m_DistanceFunction.postProcessDistances(m_DistanceList);

        for (int idx = 0; idx < indices.length; idx++) {
            neighbours.add(m_Instances.instance(indices[idx]));
        }

        return neighbours;
    }

    /**
     * Returns the nearest neighbour of the supplied target instance.
     * 
     * @param target The instance to find the nearest neighbour for.
     * @return The nearest neighbour from among the previously supplied training
     *         instances.
     * @throws Exception if the neighbours could not be found.
     */
    public Instance nearestNeighbour(Instance target) throws Exception {
        return (kNearestNeighbours(target, 1)).instance(0);
    }

    /**
     * Returns the distances to the kNearest or 1 nearest neighbour currently found
     * with either the kNearestNeighbours or the nearestNeighbour method.
     * 
     * @return array containing the distances of the nearestNeighbours. The length
     *         and ordering of the array is the same as that of the instances
     *         returned by nearestNeighbour functions.
     * @throws Exception if called before calling kNearestNeighbours or
     *                   nearestNeighbours.
     */
    public double[] getDistances() throws Exception {
        if (m_Instances == null || m_DistanceList == null)
            throw new Exception("The tree has not been supplied with a set of " + "instances or getDistances() has been called " + "before calling kNearestNeighbours().");
        return m_DistanceList;
    }

    /**
     * Builds the KDTree on the given set of instances.
     * 
     * @param instances The insts on which the KDTree is to be built.
     * @throws Exception If some error occurs while building the KDTree
     */
    public void setInstances(Instances instances) throws Exception {
        super.setInstances(instances);
        buildKDTree(instances);
    }

    /**
     * Adds one instance to the KDTree. This updates the KDTree structure to take
     * into account the newly added training instance.
     * 
     * @param instance the instance to be added. Usually the newly added instance in
     *                 the training set.
     * @throws Exception If the instance cannot be added.
     */
    public void update(Instance instance) throws Exception { // better to change
                                                             // to addInstance
        if (m_Instances == null)
            throw new Exception("No instances supplied yet. Have to call " + "setInstances(instances) with a set of Instances " + "first.");

        addInstanceInfo(instance);
        addInstanceToTree(instance, m_Root);
    }

    /**
     * Recursively adds an instance to the tree starting from the supplied
     * KDTreeNode. NOTE: This should not be called by outside classes, outside
     * classes should instead call update(Instance) method.
     * 
     * @param inst The instance to add to the tree
     * @param node The node to start the recursive search from, for the leaf node
     *             where the supplied instance would go.
     * @throws Exception If some error occurs while adding the instance.
     */
    protected void addInstanceToTree(Instance inst, KDTreeNode node) throws Exception {
        if (node.isALeaf()) {
            int instList[] = new int[m_Instances.numInstances()];
            try {
                System.arraycopy(m_InstList, 0, instList, 0, node.m_End + 1); // m_InstList.squeezeIn(m_End,
                                                                              // index);
                if (node.m_End < m_InstList.length - 1)
                    System.arraycopy(m_InstList, node.m_End + 1, instList, node.m_End + 2, m_InstList.length - node.m_End - 1);
                instList[node.m_End + 1] = m_Instances.numInstances() - 1;
            } catch (ArrayIndexOutOfBoundsException ex) {
                System.err.println("m_InstList.length: " + m_InstList.length + " instList.length: " + instList.length + "node.m_End+1: " + (node.m_End + 1) + "m_InstList.length-node.m_End+1: " + (m_InstList.length - node.m_End - 1));
                throw ex;
            }
            m_InstList = instList;

            node.m_End++;
            node.m_NodeRanges = m_EuclideanDistance.updateRanges(inst, node.m_NodeRanges);

            m_Splitter.setInstanceList(m_InstList);

            // split this leaf node if necessary
            double[][] universe = m_EuclideanDistance.getRanges();
            if (node.numInstances() > m_MaxInstInLeaf && getMaxRelativeNodeWidth(node.m_NodeRanges, universe) > m_MinBoxRelWidth) {
                m_Splitter.splitNode(node, m_NumNodes, node.m_NodeRanges, universe);
                m_NumNodes += 2;
            }
        } // end if node is a leaf
        else {
            if (m_EuclideanDistance.valueIsSmallerEqual(inst, node.m_SplitDim, node.m_SplitValue)) {
                addInstanceToTree(inst, node.m_Left);
                afterAddInstance(node.m_Right);
            } else
                addInstanceToTree(inst, node.m_Right);

            node.m_End++;
            node.m_NodeRanges = m_EuclideanDistance.updateRanges(inst, node.m_NodeRanges);
        }
    }

    /**
     * Corrects the start and end indices of a KDTreeNode after an instance is added
     * to the tree. The start and end indices for the master index array
     * (m_InstList) stored in the nodes need to be updated for all nodes in the
     * subtree on the right of a node where the instance was added. NOTE: No outside
     * class should call this method.
     * 
     * @param node KDTreeNode whose start and end indices need to be updated.
     */
    protected void afterAddInstance(KDTreeNode node) {
        node.m_Start++;
        node.m_End++;
        if (!node.isALeaf()) {
            afterAddInstance(node.m_Left);
            afterAddInstance(node.m_Right);
        }
    }

    /**
     * Adds one instance to KDTree loosly. It only changes the ranges in
     * EuclideanDistance, and does not affect the structure of the KDTree.
     * 
     * @param instance the new instance. Usually this is the test instance supplied
     *                 to update the range of attributes in the distance function.
     */
    public void addInstanceInfo(Instance instance) {
        m_EuclideanDistance.updateRanges(instance);
    }

    /**
     * Checks if there is any instance with missing values. Throws an exception if
     * there is, as KDTree does not handle missing values.
     * 
     * @param instances the instances to check
     * @throws Exception if missing values are encountered
     */
    protected void checkMissing(Instances instances) throws Exception {
        for (int i = 0; i < instances.numInstances(); i++) {
            Instance ins = instances.instance(i);
            for (int j = 0; j < ins.numValues(); j++) {
                if (ins.index(j) != ins.classIndex())
                    if (ins.isMissingSparse(j)) {
                        throw new Exception("ERROR: KDTree can not deal with missing " + "values. Please run ReplaceMissingValues filter " + "on the dataset before passing it on to the KDTree.");
                    }
            }
        }
    }

    /**
     * Checks if there is any missing value in the given instance.
     * 
     * @param ins The instance to check missing values in.
     * @throws Exception If there is a missing value in the instance.
     */
    protected void checkMissing(Instance ins) throws Exception {
        for (int j = 0; j < ins.numValues(); j++) {
            if (ins.index(j) != ins.classIndex())
                if (ins.isMissingSparse(j)) {
                    throw new Exception("ERROR: KDTree can not deal with missing " + "values. Please run ReplaceMissingValues filter " + "on the dataset before passing it on to the KDTree.");
                }
        }
    }

    /**
     * Returns the maximum attribute width of instances/points in a KDTreeNode
     * relative to the whole dataset.
     * 
     * @param nodeRanges The attribute ranges of the KDTreeNode whose maximum
     *                   relative width is to be determined.
     * @param universe   The attribute ranges of the whole dataset (training
     *                   instances + test instances so far encountered).
     * @return The maximum relative width
     */
    protected double getMaxRelativeNodeWidth(double[][] nodeRanges, double[][] universe) {
        int widest = widestDim(nodeRanges, universe);
        if (widest < 0)
            return 0.0;
        else
            return nodeRanges[widest][WIDTH] / universe[widest][WIDTH];
    }

    /**
     * Returns the widest dimension/attribute in a KDTreeNode (widest after
     * normalizing).
     * 
     * @param nodeRanges The attribute ranges of the KDTreeNode.
     * @param universe   The attribute ranges of the whole dataset (training
     *                   instances + test instances so far encountered).
     * @return The index of the widest dimension/attribute.
     */
    protected int widestDim(double[][] nodeRanges, double[][] universe) {
        final int classIdx = m_Instances.classIndex();
        double widest = 0.0;
        int w = -1;
        if (m_NormalizeNodeWidth) {
            for (int i = 0; i < nodeRanges.length; i++) {
                double newWidest = nodeRanges[i][WIDTH] / universe[i][WIDTH];
                if (newWidest > widest) {
                    if (i == classIdx)
                        continue;
                    widest = newWidest;
                    w = i;
                }
            }
        } else {
            for (int i = 0; i < nodeRanges.length; i++) {
                if (nodeRanges[i][WIDTH] > widest) {
                    if (i == classIdx)
                        continue;
                    widest = nodeRanges[i][WIDTH];
                    w = i;
                }
            }
        }
        return w;
    }

    /**
     * Returns the size of the tree.
     * 
     * @return the size of the tree
     */
    public double measureTreeSize() {
        return m_NumNodes;
    }

    /**
     * Returns the number of leaves.
     * 
     * @return the number of leaves
     */
    public double measureNumLeaves() {
        return m_NumLeaves;
    }

    /**
     * Returns the depth of the tree.
     * 
     * @return The depth of the tree
     */
    public double measureMaxDepth() {
        return m_MaxDepth;
    }

    /**
     * Returns an enumeration of the additional measure names.
     * 
     * @return an enumeration of the measure names
     */
    public Enumeration<String> enumerateMeasures() {
        Vector<String> newVector = new Vector<String>();
        newVector.addElement("measureTreeSize");
        newVector.addElement("measureNumLeaves");
        newVector.addElement("measureMaxDepth");
        if (m_Stats != null) {
            newVector.addAll(Collections.list(m_Stats.enumerateMeasures()));
        }
        return newVector.elements();
    }

    /**
     * Returns the value of the named measure.
     * 
     * @param additionalMeasureName the name of the measure to query for its value.
     * @return The value of the named measure
     * @throws IllegalArgumentException If the named measure is not supported.
     */
    public double getMeasure(String additionalMeasureName) {
        if (additionalMeasureName.compareToIgnoreCase("measureMaxDepth") == 0) {
            return measureMaxDepth();
        } else if (additionalMeasureName.compareToIgnoreCase("measureTreeSize") == 0) {
            return measureTreeSize();
        } else if (additionalMeasureName.compareToIgnoreCase("measureNumLeaves") == 0) {
            return measureNumLeaves();
        } else if (m_Stats != null) {
            return m_Stats.getMeasure(additionalMeasureName);
        } else {
            throw new IllegalArgumentException(additionalMeasureName + " not supported (KDTree)");
        }
    }

    /**
     * Sets whether to calculate the performance statistics or not.
     * 
     * @param measurePerformance Should be true if performance statistics are to be
     *                           measured.
     */
    public void setMeasurePerformance(boolean measurePerformance) {
        m_MeasurePerformance = measurePerformance;
        if (m_MeasurePerformance) {
            if (m_Stats == null)
                m_Stats = m_TreeStats = new TreePerformanceStats();
        } else
            m_Stats = m_TreeStats = null;
    }

    /**
     * Assigns instances to centers using KDTree.
     * 
     * @param centers     the current centers
     * @param assignments the centerindex for each instance
     * @param pc          the threshold value for pruning.
     * @throws Exception If there is some problem assigning instances to centers.
     */
    public void centerInstances(Instances centers, int[] assignments, double pc) throws Exception {

        int[] centList = new int[centers.numInstances()];
        for (int i = 0; i < centers.numInstances(); i++)
            centList[i] = i;

        determineAssignments(m_Root, centers, centList, assignments, pc);
    }

    /**
     * Assigns instances to the current centers called candidates.
     * 
     * @param node        The node to start assigning the instances from.
     * @param centers     all the current centers.
     * @param candidates  the current centers the method works on.
     * @param assignments the center index for each instance.
     * @param pc          the threshold value for pruning.
     * @throws Exception If there is some problem assigning instances to centers.
     */
    protected void determineAssignments(KDTreeNode node, Instances centers, int[] candidates, int[] assignments, double pc) throws Exception {

        // reduce number of owners for current hyper rectangle
        int[] owners = refineOwners(node, centers, candidates);

        // only one owner
        if (owners.length == 1) {
            // all instances of this node are owned by one center
            for (int i = node.m_Start; i <= node.m_End; i++) {
                assignments[m_InstList[i]] // the assignment of this instance
                        = owners[0]; // is the current owner
            }
        } else if (!node.isALeaf()) {
            // more than one owner and it is not a leaf
            determineAssignments(node.m_Left, centers, owners, assignments, pc);
            determineAssignments(node.m_Right, centers, owners, assignments, pc);
        } else {
            // this is a leaf and there are more than 1 owner
            // XMeans.
            assignSubToCenters(node, centers, owners, assignments);
        }
    }

    /**
     * Refines the ownerlist.
     * 
     * @param node       The current tree node.
     * @param centers    all centers
     * @param candidates the indexes of those centers that are candidates.
     * @return list of owners
     * @throws Exception If some problem occurs in refining.
     */
    protected int[] refineOwners(KDTreeNode node, Instances centers, int[] candidates) throws Exception {

        int[] owners = new int[candidates.length];
        double minDistance = Double.POSITIVE_INFINITY;
        int ownerIndex = -1;
        Instance owner;
        int numCand = candidates.length;
        double[] distance = new double[numCand];
        boolean[] inside = new boolean[numCand];
        for (int i = 0; i < numCand; i++) {
            distance[i] = distanceToHrect(node, centers.instance(candidates[i]));
            inside[i] = (distance[i] == 0.0);
            if (distance[i] < minDistance) {
                minDistance = distance[i];
                ownerIndex = i;
            }
        }
        owner = (Instance) centers.instance(candidates[ownerIndex]).copy();

        // are there other owners
        // loop also goes over already found owner, keeps order
        // in owner list
        int index = 0;
        for (int i = 0; i < numCand; i++) {
            // 1. all centers that are points within rectangle are owners
            if ((inside[i])

                    // 2. take all points with same distance to the rect. as the owner
                    || (distance[i] == distance[ownerIndex])) {

                // add competitor to owners list
                owners[index++] = candidates[i];
            } else {

                Instance competitor = (Instance) centers.instance(candidates[i]).copy();
                if

                // 3. point has larger distance to rectangle but still can compete
                // with owner for some points in the rectangle
                (!candidateIsFullOwner(node, owner, competitor))

                {
                    // also add competitor to owners list
                    owners[index++] = candidates[i];
                }
            }
        }
        int[] result = new int[index];
        for (int i = 0; i < index; i++)
            result[i] = owners[i];
        return result;
    }

    /**
     * Returns the distance between a point and an hyperrectangle.
     * 
     * @param node The current node from whose hyperrectangle the distance is to be
     *             measured.
     * @param x    the point
     * @return the distance
     * @throws Exception If some problem occurs in determining the distance to the
     *                   hyperrectangle.
     */
    protected double distanceToHrect(KDTreeNode node, Instance x) throws Exception {
        double distance = 0.0;

        Instance closestPoint = (Instance) x.copy();
        boolean inside;
        inside = clipToInsideHrect(node, closestPoint);
        if (!inside)
            distance = m_EuclideanDistance.distance(closestPoint, x);
        return distance;
    }

    /**
     * Finds the closest point in the hyper rectangle to a given point. Change the
     * given point to this closest point by clipping of at all the dimensions to be
     * clipped of. If the point is inside the rectangle it stays unchanged. The
     * return value is true if the point was not changed, so the the return value is
     * true if the point was inside the rectangle.
     * 
     * @param node The current KDTreeNode in whose hyperrectangle the closest point
     *             is to be found.
     * @param x    a point
     * @return true if the input point stayed unchanged.
     */
    protected boolean clipToInsideHrect(KDTreeNode node, Instance x) {
        boolean inside = true;
        for (int i = 0; i < m_Instances.numAttributes(); i++) {
            // TODO treat nominals differently!??

            if (x.value(i) < node.m_NodeRanges[i][MIN]) {
                x.setValue(i, node.m_NodeRanges[i][MIN]);
                inside = false;
            } else if (x.value(i) > node.m_NodeRanges[i][MAX]) {
                x.setValue(i, node.m_NodeRanges[i][MAX]);
                inside = false;
            }
        }
        return inside;
    }

    /**
     * Returns true if candidate is a full owner in respect to a competitor.
     * <p>
     * 
     * The candidate has been the closer point to the current rectangle or even has
     * been a point within the rectangle. The competitor is competing with the
     * candidate for a few points out of the rectangle although it is a point
     * further away from the rectangle then the candidate. The extrem point is the
     * corner of the rectangle that is furthest away from the candidate towards the
     * direction of the competitor.
     * 
     * If the distance candidate to this extreme point is smaller then the distance
     * competitor to this extreme point, then it is proven that none of the points
     * in the rectangle can be owned be the competitor and the candidate is full
     * owner of the rectangle in respect to this competitor. See also D. Pelleg and
     * A. Moore's paper 'Accelerating exact k-means Algorithms with Geometric
     * Reasoning'.
     * <p>
     * 
     * @param node       The current KDTreeNode / hyperrectangle.
     * @param candidate  instance that is candidate to be owner
     * @param competitor instance that competes against the candidate
     * @return true if candidate is full owner
     * @throws Exception If some problem occurs.
     */
    protected boolean candidateIsFullOwner(KDTreeNode node, Instance candidate, Instance competitor) throws Exception {
        // get extreme point
        double[] extreme = new double[m_Instances.numAttributes()];
        for (int i = 0; i < m_Instances.numAttributes(); i++) {
            if ((competitor.value(i) - candidate.value(i)) > 0) {
                extreme[i] = node.m_NodeRanges[i][MAX];
            } else {
                extreme[i] = node.m_NodeRanges[i][MIN];
            }
        }
        Instance extremeI = candidate.copy(extreme);
        boolean isFullOwner = m_EuclideanDistance.distance(extremeI, candidate) < m_EuclideanDistance.distance(extremeI, competitor);

        return isFullOwner;
    }

    /**
     * Assigns instances of this node to center. Center to be assign to is decided
     * by the distance function.
     * 
     * @param node        The KDTreeNode whose instances are to be assigned.
     * @param centers     all the input centers
     * @param centList    the list of centers to work with
     * @param assignments index list of last assignments
     * @throws Exception If there is error assigning the instances.
     */
    public void assignSubToCenters(KDTreeNode node, Instances centers, int[] centList, int[] assignments) throws Exception {
        // todo: undecided situations

        // WARNING: assignments is "input/output-parameter"
        // should not be null and the following should not happen
        if (assignments == null) {
            assignments = new int[m_Instances.numInstances()];
            for (int i = 0; i < assignments.length; i++) {
                assignments[i] = -1;
            }
        }

        // set assignments for all instances of this node
        for (int i = node.m_Start; i <= node.m_End; i++) {
            int instIndex = m_InstList[i];
            Instance inst = m_Instances.instance(instIndex);
            // if (instList[i] == 664) System.out.println("664***");
            int newC = m_EuclideanDistance.closestPoint(inst, centers, centList);
            // int newC = clusterProcessedInstance(inst, centers);
            assignments[instIndex] = newC;
        }
    }

    /**
     * Properties' variables =====================================================
     */

    /** flag for normalizing. */
    boolean m_NormalizeNodeWidth = true;

    /** The euclidean distance function to use. */
    protected EuclideanDistance m_EuclideanDistance;
    { // to make sure we have only one object of EuclideanDistance
        if (m_DistanceFunction instanceof EuclideanDistance)
            m_EuclideanDistance = (EuclideanDistance) m_DistanceFunction;
        else
            m_DistanceFunction = m_EuclideanDistance = new EuclideanDistance();
    }

    /** minimal relative width of a KDTree rectangle. */
    protected double m_MinBoxRelWidth = 1.0E-2;

    /** maximal number of instances in a leaf. */
    protected int m_MaxInstInLeaf = 40;

    /**
     * the GET and SET - functions ===============================================
     */

    /**
     * Tip text for this property.
     * 
     * @return the tip text for this property
     */
    public String minBoxRelWidthTipText() {
        return "The minimum relative width of the box. A node is only made a leaf " + "if the width of the split dimension of the instances in a node " + "normalized over the width of the split dimension of all the " + "instances is less than or equal to this minimum relative width.";
    }

    /**
     * Sets the minimum relative box width.
     * 
     * @param i the minimum relative box width
     */
    public void setMinBoxRelWidth(double i) {
        m_MinBoxRelWidth = i;
    }

    /**
     * Gets the minimum relative box width.
     * 
     * @return the minimum relative box width
     */
    public double getMinBoxRelWidth() {
        return m_MinBoxRelWidth;
    }

    /**
     * Tip text for this property.
     * 
     * @return the tip text for this property
     */
    public String maxInstInLeafTipText() {
        return "The max number of instances in a leaf.";
    }

    /**
     * Sets the maximum number of instances in a leaf.
     * 
     * @param i the maximum number of instances in a leaf
     */
    public void setMaxInstInLeaf(int i) {
        m_MaxInstInLeaf = i;
    }

    /**
     * Get the maximum number of instances in a leaf.
     * 
     * @return the maximum number of instances in a leaf
     */
    public int getMaxInstInLeaf() {
        return m_MaxInstInLeaf;
    }

    /**
     * Tip text for this property.
     * 
     * @return the tip text for this property
     */
    public String normalizeNodeWidthTipText() {
        return "Whether if the widths of the KDTree node should be normalized " + "by the width of the universe or not. " + "Where, width of the node is the range of the split attribute " + "based on the instances in that node, and width of the " + "universe is the range of the split attribute based on all the " + "instances (default: false).";
    }

    /**
     * Sets the flag for normalizing the widths of a KDTree Node by the width of the
     * dimension in the universe.
     * 
     * @param n true to use normalizing.
     */
    public void setNormalizeNodeWidth(boolean n) {
        m_NormalizeNodeWidth = n;
    }

    /**
     * Gets the normalize flag.
     * 
     * @return True if normalizing
     */
    public boolean getNormalizeNodeWidth() {
        return m_NormalizeNodeWidth;
    }

    /**
     * returns the distance function currently in use.
     * 
     * @return the distance function
     */
    public DistanceFunction getDistanceFunction() {
        return (DistanceFunction) m_EuclideanDistance;
    }

    /**
     * sets the distance function to use for nearest neighbour search.
     * 
     * @param df the distance function to use
     * @throws Exception if not EuclideanDistance
     */
    public void setDistanceFunction(DistanceFunction df) throws Exception {
        if (!(df instanceof EuclideanDistance))
            throw new Exception("KDTree currently only works with " + "EuclideanDistanceFunction.");
        m_DistanceFunction = m_EuclideanDistance = (EuclideanDistance) df;
    }

    /**
     * Returns the tip text for this property.
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String nodeSplitterTipText() {
        return "The the splitting method to split the nodes of the KDTree.";
    }

    /**
     * Returns the splitting method currently in use to split the nodes of the
     * KDTree.
     * 
     * @return The KDTreeNodeSplitter currently in use.
     */
    public KDTreeNodeSplitter getNodeSplitter() {
        return m_Splitter;
    }

    /**
     * Sets the splitting method to use to split the nodes of the KDTree.
     * 
     * @param splitter The KDTreeNodeSplitter to use.
     */
    public void setNodeSplitter(KDTreeNodeSplitter splitter) {
        m_Splitter = splitter;
    }

    /**
     * Returns a string describing this nearest neighbour search algorithm.
     * 
     * @return a description of the algorithm for displaying in the
     *         explorer/experimenter gui
     */
    public String globalInfo() {
        return "Class implementing the KDTree search algorithm for nearest " + "neighbour search.\n" + "The connection to dataset is only a reference. For the tree " + "structure the indexes are stored in an array. \n" + "Building the tree:\n" + "If a node has <maximal-inst-number> (option -L) instances no " + "further splitting is done. Also if the split would leave one " + "side empty, the branch is not split any further even if the " + "instances in the resulting node are more than " + "<maximal-inst-number> instances.\n" + "**PLEASE NOTE:** The algorithm can not handle missing values, so it " + "is advisable to run ReplaceMissingValues filter if there are any " + "missing values in the dataset.\n\n" + "For more information see:\n\n" + getTechnicalInformation().toString();
    }

    /**
     * Returns an enumeration describing the available options.
     * 
     * @return an enumeration of all the available options.
     */
    public Enumeration<Option> listOptions() {
        Vector<Option> newVector = new Vector<Option>();

        newVector.add(new Option("\tNode splitting method to use.\n" + "\t(default: weka.core.neighboursearch.kdtrees.SlidingMidPointOfWidestSide)", "S", 1, "-S <classname and options>"));

        newVector.addElement(new Option("\tSet minimal width of a box\n" + "\t(default: 1.0E-2).", "W", 1, "-W <value>"));

        newVector.addElement(new Option("\tMaximal number of instances in a leaf\n" + "\t(default: 40).", "L", 1, "-L <value>"));

        newVector.addElement(new Option("\tNormalizing will be done\n" + "\t(Select dimension for split, with normalising to universe).", "N", 0, "-N"));

        newVector.addAll(Collections.list(super.listOptions()));

        return newVector.elements();
    }

    /**
     * Parses a given list of options.
     * <p/>
     * 
     * <!-- options-start --> Valid options are:
     * <p/>
     * 
     * <pre>
     *  -S &lt;classname and options&gt;
     *  Node splitting method to use.
     *  (default: weka.core.neighboursearch.kdtrees.SlidingMidPointOfWidestSide)
     * </pre>
     * 
     * <pre>
     *  -W &lt;value&gt;
     *  Set minimal width of a box
     *  (default: 1.0E-2).
     * </pre>
     * 
     * <pre>
     *  -L
     *  Maximal number of instances in a leaf
     *  (default: 40).
     * </pre>
     * 
     * <pre>
     *  -N
     *  Normalizing will be done
     *  (Select dimension for split, with normalising to universe).
     * </pre>
     * 
     * <!-- options-end -->
     * 
     * @param options the list of options as an array of strings
     * @throws Exception if an option is not supported
     */
    public void setOptions(String[] options) throws Exception {
        super.setOptions(options);

        String optionString = Utils.getOption('S', options);
        if (optionString.length() != 0) {
            String splitMethodSpec[] = Utils.splitOptions(optionString);
            if (splitMethodSpec.length == 0) {
                throw new Exception("Invalid DistanceFunction specification string.");
            }
            String className = splitMethodSpec[0];
            splitMethodSpec[0] = "";

            setNodeSplitter((KDTreeNodeSplitter) Utils.forName(KDTreeNodeSplitter.class, className, splitMethodSpec));
        } else {
            setNodeSplitter(new SlidingMidPointOfWidestSide());
        }

        optionString = Utils.getOption('W', options);
        if (optionString.length() != 0)
            setMinBoxRelWidth(Double.parseDouble(optionString));
        else
            setMinBoxRelWidth(1.0E-2);

        optionString = Utils.getOption('L', options);
        if (optionString.length() != 0)
            setMaxInstInLeaf(Integer.parseInt(optionString));
        else
            setMaxInstInLeaf(40);

        setNormalizeNodeWidth(Utils.getFlag('N', options));

        Utils.checkForRemainingOptions(options);
    }

    /**
     * Gets the current settings of KDtree.
     * 
     * @return an array of strings suitable for passing to setOptions
     */
    public String[] getOptions() {
        Vector<String> result;
        String[] options;
        int i;

        result = new Vector<String>();

        options = super.getOptions();
        for (i = 0; i < options.length; i++)
            result.add(options[i]);

        result.add("-S");
        result.add((m_Splitter.getClass().getName() + " " + Utils.joinOptions(m_Splitter.getOptions())).trim());

        result.add("-W");
        result.add("" + getMinBoxRelWidth());

        result.add("-L");
        result.add("" + getMaxInstInLeaf());

        if (getNormalizeNodeWidth())
            result.add("-N");

        return result.toArray(new String[result.size()]);
    }

}
